Licensed under the Creative Commons attribution-noncommercial license, http://creativecommons.org/licenses/by-nc/3.0/. Please share and remix noncommercially, mentioning its origin.
CC-BY_NC

Produced in R version 3.2.1 using pomp version 0.72.2.

Model implementation

Download the data from the WHO Situation Report of 1 October 2014:

baseurl <- "http://kinglab.eeb.lsa.umich.edu/SBIED/data/"
read.csv(paste0(baseurl,"ebola_data.csv"),stringsAsFactors=FALSE,
         colClasses=c(date="Date")) -> dat
str(dat)
## 'data.frame':    74 obs. of  4 variables:
##  $ week   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ date   : Date, format: "2014-01-05" "2014-01-12" ...
##  $ country: chr  "Guinea" "Guinea" "Guinea" "Guinea" ...
##  $ cases  : num  2.244 2.244 0.073 5.717 3.954 ...
head(dat)
##   week       date country cases
## 1    1 2014-01-05  Guinea 2.244
## 2    2 2014-01-12  Guinea 2.244
## 3    3 2014-01-19  Guinea 0.073
## 4    4 2014-01-26  Guinea 5.717
## 5    5 2014-02-02  Guinea 3.954
## 6    6 2014-02-09  Guinea 5.444
## Population sizes in Guinea, Liberia, and Sierra Leone (census 2014)
populations <- c(Guinea=10628972,Liberia=4092310,SierraLeone=6190280)
dat %>%
  ggplot(aes(x=date,y=cases,group=country,color=country))+
  geom_line()

toEst <- Csnippet('
  const double *IC = &S_0;
  double *TIC = &TS_0;
  TR0 = log(R0);
  Trho = logit(rho);
  Tk = log(k);
  to_log_barycentric(TIC,IC,4);
')

fromEst <- Csnippet('
  const double *IC = &S_0;
  double *TIC = &TS_0;
  TR0 = exp(R0);
  Trho = expit(rho);
  Tk = exp(k);
  from_log_barycentric(TIC,IC,4);
')

Measurement model: hierarchical model for cases

\(C_t | H_t\) is negative binomial with \({\mathbb{E}\left[{C_t|H_t}\right]} = \rho\,H_t\) and \({\mathrm{Var}\left[{C_t|H_t}\right]} = \rho\,H_t\,(1+k\,\rho\,H_t)\).

dObs <- Csnippet('
  double f;
  if (k > 0.0)
    f = dnbinom_mu(nearbyint(cases),1.0/k,rho*N_EI,1);
  else
    f = dpois(nearbyint(cases),rho*N_EI,1);
  lik = (give_log) ? f : exp(f);
')

rObs <- Csnippet('
  if (k > 0) {
    cases = rnbinom_mu(1.0/k,rho*N_EI);
  } else {
    cases = rpois(rho*N_EI);
  }')

Process model simulator

rSim <- Csnippet('
  double lambda, beta;
  double *E = &E1;
  beta = R0 * gamma; // Transmission rate
  lambda = beta * I / N; // Force of infection
  int i;

  // Transitions
  // From class S
  double transS = rbinom(S, 1.0 - exp(- lambda * dt)); // No of infections
  // From class E
  double transE[nstageE]; // No of transitions between classes E
  for(i = 0; i < nstageE; i++){
    transE[i] = rbinom(E[i], 1.0 - exp(-nstageE * alpha * dt));
  }
  // From class I
  double transI = rbinom(I, 1.0 - exp(-gamma * dt)); // No of transitions I->R

  // Balance the equations
  S -= transS;
  E[0] += transS - transE[0];
  for(i=1; i < nstageE; i++) {
    E[i] += transE[i-1] - transE[i];
  }
  I += transE[nstageE-1] - transI;
  R += transI;
  N_EI += transE[nstageE-1]; // No of transitions from E to I
  N_IR += transI; // No of transitions from I to R
')

Deterministic skeleton (an ODE), used in trajectory matching

skel <- Csnippet('
  double lambda, beta;
  const double *E = &E1;
  double *DE = &DE1;
  beta = R0 * gamma; // Transmission rate
  lambda = beta * I / N; // Force of infection
  int i;

  // Balance the equations
  DS = - lambda * S;
  DE[0] = lambda * S - nstageE * alpha * E[0];
  for (i=1; i < nstageE; i++)
    DE[i] = nstageE * alpha * (E[i-1]-E[i]);
  DI = nstageE * alpha * E[nstageE-1] - gamma * I;
  DR = gamma * I;
  DN_EI = nstageE * alpha * E[nstageE-1];
  DN_IR = gamma * I;
')
ebolaModel <- function (country=c("Guinea", "SierraLeone", "Liberia"),
                        timestep = 0.1) {

  ctry <- match.arg(country)
  pop <- unname(populations[ctry])
 
  ## Incubation period is supposed to be Gamma distributed with shape parameter 3
  ## and mean 11.4 days.  The discrete-time formula is used to calculate the
  ## corresponding alpha (cf He et al., Interface 2010).
  ## Case-fatality ratio is fixed at 0.7 (cf WHO Ebola response team, NEJM 2014)
  incubation_period <- 11.4/7
  infectious_period <- 7/7
  index_case <- 10/pop
  dt <- timestep
  nstageE <- 3L

  globs <- paste0("static int nstageE = ",nstageE,";");

  theta <- c(N=pop,R0=1.4,
             alpha=-1/(nstageE*timestep)*log(1-nstageE*timestep/incubation_period),
             gamma=-log(1-timestep/infectious_period)/timestep,
             rho=0.2,cfr=0.7,
             k=0,
             S_0=1-index_case,E_0=index_case/2-5e-9,
             I_0=index_case/2-5e-9,R_0=1e-8)

  dat <- subset(dat,country==ctry,select=-country)

  ## Create the pomp object
  dat %>% 
    extract(c("week","cases")) %>%
    pomp(
      times="week",
      t0=min(dat$week)-1,
      params=theta,
      globals=globs,
      statenames=c("S","E1","I","R","N_EI","N_IR"),
      zeronames=c("N_EI","N_IR"),
      paramnames=c("N","R0","alpha","gamma","rho","k","cfr",
                   "S_0","E_0","I_0","R_0"),
      nstageE=nstageE,
      dmeasure=dObs,
      rmeasure=rObs,
      rprocess=discrete.time.sim(step.fun=rSim,delta.t=timestep),
      skeleton=skel,
      skeleton.type="vectorfield",
      toEstimationScale=toEst,
      fromEstimationScale=fromEst,
      initializer=function (params, t0, nstageE, ...) {
        all.state.names <- c("S",paste0("E",1:nstageE),"I","R","N_EI","N_IR")
        comp.names <- c("S",paste0("E",1:nstageE),"I","R")
        x0 <- setNames(numeric(length(all.state.names)),all.state.names)
        frac <- c(params["S_0"],rep(params["E_0"]/nstageE,nstageE),params["I_0"],params["R_0"])
        x0[comp.names] <- round(params["N"]*frac/sum(frac))
        x0
      }
    ) -> po
}

ebolaModel("Guinea") -> gin
ebolaModel("SierraLeone") -> sle
ebolaModel("Liberia") -> lbr

Parameter estimation

The following are the contents of the file profiles.R, which uses trajectory matching and iterated filtering to estimate model parameters for all four countries and both types of data. Again, this is designed to be run on an MPI cluster. In a directory with the files hosts and ebola-model.R (see above), execute these computations via a command like

mpirun -hostfile hosts -np 101 Rscript --vanilla profiles.R

Contents of file profiles.R:

require(pomp)
require(plyr)
require(reshape2)
require(magrittr)
options(stringsAsFactors=FALSE)

require(foreach)
require(doMPI)
require(iterators)

source("ebola-model.R")

foreach (country=c("SierraLeone","Liberia","Guinea","WestAfrica"),
         .inorder=TRUE,.combine=c) %:%
  foreach (type=c("raw","cum"),.inorder=TRUE,.combine=c) %do%
  {
    ebolaModel(country=country,type=type,na.rm=TRUE)
    } -> models
dim(models) <- c(2,4)
dimnames(models) <- list(c("raw","cum"),
                         c("SierraLeone","Liberia","Guinea","WestAfrica"))

noexport <- c("models")

cl <- startMPIcluster()
registerDoMPI(cl)

bake <- function (file, expr) {
  if (file.exists(file)) {
    readRDS(file)
    } else {
      val <- eval(expr)
      saveRDS(val,file=file)
      val
      }
  }

## trajectory matching: R0 profile

bake(file="ebola-tm-fits-R0.rds",{
  
  starts <- profileDesign(R0=seq(1,3,length=200),
                          upper=c(k=1),
                          lower=c(k=1e-8),
                          nprof=40)
  
  foreach (start=iter(starts,by='row'),
           .combine=rbind,.inorder=FALSE,
           .noexport=noexport,
           .options.mpi=list(chunkSize=100,seed=2016138277L,info=TRUE)
           ) %:%
    foreach (type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %:%
    foreach (country=c("SierraLeone","Liberia","Guinea","WestAfrica"),
             .combine=rbind,.inorder=FALSE) %dopar%
    {
      tm <- models[type,country][[1]]
      tic <- Sys.time()
      coef(tm,names(start)) <- unname(unlist(start))
      coef(tm,"rho") <- 0.2
      tm <- traj.match(tm,est=c("k","E_0","I_0"),transform=TRUE)
      if (coef(tm,"k")==0) coef(tm,"k") <- 1e-8
      if (coef(tm,"E_0")==0) coef(tm,"E_0") <- 1e-12
      if (coef(tm,"I_0")==0) coef(tm,"I_0") <- 1e-12
      tm <- traj.match(tm,method='subplex',control=list(maxit=1e5))
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      data.frame(country=country,type=type,as.list(coef(tm)),
                 loglik=logLik(tm),conv=tm$convergence,
                 etime=as.numeric(etime))
      } %>% mutate(sum=S_0+E_0+I_0+R_0,
                   S_0=round(N*S_0/sum),
                   E_0=round(N*E_0/sum),
                   I_0=round(N*I_0/sum),
                   R_0=round(N*R_0/sum)) %>%
    subset(conv %in% c(0,1),select=-sum) %>%
    unique()
  
  }) -> profR0

## trajectory matching: k profile

bake(file="ebola-tm-fits-k.rds",{
  
  starts <- profileDesign(k=seq(0,1,length=100),
                          upper=c(R0=1),
                          lower=c(R0=3),
                          nprof=40)
  
  foreach (start=iter(starts,by='row'),
           .combine=rbind,.inorder=FALSE,
           .noexport=noexport,
           .options.mpi=list(chunkSize=100,seed=2016138277L,info=TRUE)
           ) %:%
    foreach (type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %:%
    foreach (country=c("SierraLeone","Liberia","Guinea","WestAfrica"),
             .combine=rbind,.inorder=FALSE) %dopar%
    {
      tm <- models[type,country][[1]]
      tic <- Sys.time()
      coef(tm,names(start)) <- unname(unlist(start))
      coef(tm,"rho") <- 0.2
      tm <- traj.match(tm,est=c("R0","E_0","I_0"),transform=TRUE)
      if (coef(tm,"E_0")==0) coef(tm,"E_0") <- 1e-12
      if (coef(tm,"I_0")==0) coef(tm,"I_0") <- 1e-12
      tm <- traj.match(tm,method='subplex',control=list(maxit=1e5))
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      data.frame(country=country,type=type,as.list(coef(tm)),
                 loglik=logLik(tm),conv=tm$convergence,
                 etime=as.numeric(etime))
      } %>% mutate(sum=S_0+E_0+I_0+R_0,
                   S_0=round(N*S_0/sum),
                   E_0=round(N*E_0/sum),
                   I_0=round(N*I_0/sum),
                   R_0=round(N*R_0/sum)) %>%
    subset(conv %in% c(0,1),select=-sum) %>%
    unique()
  
  }) -> profk

## All trajectory matching computations
ldply(list(R0=profR0,k=profk),.id='profile') -> profTM

## Iterated filtering, R0 profile

bake(file="ebola-if-fits-R0_a.rds",{
  
  profTM %>% subset(profile=="R0") %>%
    ddply(~country+type,subset,
          is.finite(loglik)&loglik>max(loglik)-20) %>%
    ddply(~country+type+R0,subset,
          loglik==max(loglik),
          select=-c(loglik,etime,conv,profile)) -> pars
  
  foreach (start=iter(pars,by='row'),
           .combine=rbind,.inorder=FALSE,
           .options.mpi=list(chunkSize=1,seed=1264624821L),
           .noexport=noexport) %dopar%
    {
      tic <- Sys.time()
      
      country <- as.character(start$country)
      type <- as.character(start$type)
      st <- unlist(subset(start,select=-c(country,type)))
      
      po <- models[type,country][[1]]
      coef(po,names(st)) <- st
      if (coef(po,"E_0")==0) coef(po,"E_0") <- 1e-5
      if (coef(po,"I_0")==0) coef(po,"I_0") <- 1e-5
      
      mf <- mif(po, Nmif=10,
                rw.sd = c(k=0.02,E_0=1,I_0=1),
                ivps = c("E_0","I_0"),
                Np = 2000,
                var.factor = 2,
                method = "mif2",
                cooling.type = "hyperbolic",
                cooling.fraction = 0.5,
                transform = TRUE,
                verbose = FALSE)
      mf <- continue(mf, Nmif = 50, cooling.fraction = 0.1)
      
      ## Runs 10 particle filters to assess Monte Carlo error in likelihood
      pf <- replicate(10,pfilter(mf,Np=5000,max.fail=Inf))
      ll <- sapply(pf,logLik)
      ll <- logmeanexp(ll, se = TRUE)
      nfail <- sapply(pf,getElement,"nfail")
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      
      data.frame(country=country,type=type,as.list(coef(mf)),
                 loglik = ll[1],
                 loglik.se = ll[2],
                 nfail.min = min(nfail),
                 nfail.max = max(nfail),
                 etime = as.numeric(etime))
      }
  }) -> profR0

## Filter once more on maxima

bake(file="ebola-if-fits-R0.rds",{
  
  profR0 %>% subset(is.finite(loglik)&nfail.max==0) %>%
    ddply(~country+type+R0,subset,rank(-loglik)<=5) %>%
    subset(select=-c(loglik,loglik.se,nfail.max,nfail.min,etime)) -> pars
  
  foreach (start=iter(pars,by='row'),
           .combine=rbind,.inorder=FALSE,
           .options.mpi=list(chunkSize=1,seed=1264624821L),
           .noexport=noexport) %dopar%
    {
      tic <- Sys.time()
      
      country <- as.character(start$country)
      type <- as.character(start$type)
      st <- unlist(subset(start,select=-c(country,type)))
      
      po <- models[type,country][[1]]
      coef(po,names(st)) <- unname(st)
      
      ## Runs 10 particle filters to assess Monte Carlo error in likelihood
      pf <- try(replicate(10,pfilter(po,Np=5000,max.fail=Inf)))
      
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      
      ll <- sapply(pf,logLik)
      ll <- logmeanexp(ll, se = TRUE)
      nfail <- sapply(pf,getElement,"nfail")
      
      data.frame(country=country,type=type,as.list(coef(po)),
                 loglik = ll[1],
                 loglik.se = ll[2],
                 nfail.min = min(nfail),
                 nfail.max = max(nfail),
                 etime = as.numeric(etime))
      }
  }) -> profR0

## Iterated filtering, k profile

bake(file="ebola-if-fits-k_a.rds",{
  
  profTM %>% subset(profile=="k") %>%
    ddply(~country+type,subset,
          is.finite(loglik)&loglik>max(loglik)-20) %>%
    ddply(~country+type+k,subset,
          loglik==max(loglik),
          select=-c(loglik,etime,conv,profile)) -> pars
  
  foreach (start=iter(pars,by='row'),
           .combine=rbind,.inorder=FALSE,
           .options.mpi=list(chunkSize=1,seed=1264624821L),
           .noexport=noexport) %dopar%
    {
      tic <- Sys.time()
      
      country <- as.character(start$country)
      type <- as.character(start$type)
      st <- unlist(subset(start,select=-c(country,type)))
      
      po <- models[type,country][[1]]
      coef(po,names(st)) <- st
      if (coef(po,"E_0")==0) coef(po,"E_0") <- 1e-5
      if (coef(po,"I_0")==0) coef(po,"I_0") <- 1e-5
      
      mf <- mif(po, Nmif=10,
                rw.sd = c(R0=0.02,E_0=1,I_0=1),
                ivps = c("E_0","I_0"),
                Np = 2000,
                var.factor = 2,
                method = "mif2",
                cooling.type = "hyperbolic",
                cooling.fraction = 0.5,
                transform = TRUE,
                verbose = FALSE)
      mf <- continue(mf, Nmif = 50, cooling.fraction = 0.1)
      
      ## Runs 10 particle filters to assess Monte Carlo error in likelihood
      pf <- replicate(10,pfilter(mf,Np=5000,max.fail=Inf))
      ll <- sapply(pf,logLik)
      ll <- logmeanexp(ll, se = TRUE)
      nfail <- sapply(pf,getElement,"nfail")
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      
      data.frame(country=country,type=type,as.list(coef(mf)),
                 loglik = ll[1],
                 loglik.se = ll[2],
                 nfail.min = min(nfail),
                 nfail.max = max(nfail),
                 etime = as.numeric(etime))
      }
  }) -> profk

## Filter once more on maxima

bake(file="ebola-if-fits-k.rds",{
  
  profk %>% subset(is.finite(loglik)&nfail.max==0) %>%
    ddply(~country+type+R0,subset,rank(-loglik)<=5) %>%
    subset(select=-c(loglik,loglik.se,nfail.max,nfail.min,etime)) -> pars
  
  foreach (start=iter(pars,by='row'),
           .combine=rbind,.inorder=FALSE,
           .options.mpi=list(chunkSize=1,seed=1264624821L),
           .noexport=noexport) %dopar%
    {
      tic <- Sys.time()
      
      country <- as.character(start$country)
      type <- as.character(start$type)
      st <- unlist(subset(start,select=-c(country,type)))
      
      po <- models[type,country][[1]]
      coef(po,names(st)) <- unname(st)
      
      ## Runs 10 particle filters to assess Monte Carlo error in likelihood
      pf <- try(replicate(10,pfilter(po,Np=5000,max.fail=Inf)))
      
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      
      ll <- sapply(pf,logLik)
      ll <- logmeanexp(ll, se = TRUE)
      nfail <- sapply(pf,getElement,"nfail")
      
      data.frame(country=country,type=type,as.list(coef(po)),
                 loglik = ll[1],
                 loglik.se = ll[2],
                 nfail.min = min(nfail),
                 nfail.max = max(nfail),
                 etime = as.numeric(etime))
      }
  }) -> profk

ldply(list(R0=profR0,k=profk),.id='profile') -> profIF

ldply(list(det=profTM,stoch=profIF),.id='model') %>%
  saveRDS(file='ebola-profiles.rds')

closeCluster(cl)
mpi.quit()

Executing this code will result in the creation of several files, of which ebola-profiles.rds is the most important, containing as it does the results of all the parameter estimation.

The following plots the results in various ways:

require(reshape2)
require(plyr)
require(magrittr)
require(ggplot2)
theme_set(theme_bw())

readRDS("ebola-profiles.rds") %>% 
  melt(id=c("model","profile","country","type","loglik")) %>%
  mutate(variable=as.character(variable),profile=as.character(profile)) %>% 
  subset(variable==profile) %>%
  ddply(~country+type+model,mutate,dll=loglik-max(loglik)) -> x

x %>%
  ddply(~country+type+model+profile+value,subset,loglik==max(loglik)) -> xx

pls1 <- dlply(x,~country,function (x)  {
  ggplot(data=x,mapping=aes(x=value,y=dll,color=model))+
    geom_point(alpha=0.2)+
    facet_wrap(~type+profile,scales='free')+
    labs(title=unique(x$country))
  })

pls2 <- llply(names(pls1),function(n) {
  pls1[[n]] %+% subset(x,country==n & dll>-20)
  })

pls3 <- llply(names(pls1),function (n) 
  {
  pls1[[n]] %+% subset(xx,country==n & dll>-20)
  })

print(pls1)

print(pls2)

print(pls3)

Diagnostics

The file diagnostics.R, whose contents are shown below, computes the diagnostics displayed in the paper. These computations are not very heavy, but can be accelerated using doMC if run on a multi-core workstation. In a directory containing ebola-profiles.rds (see above) and ebola-model.R, execute these codes with a command like

Rscript --vanilla diagnostics.R

Contents of file diagnostics.R:

library(pomp)
library(plyr)
library(reshape2)
library(magrittr)
options(stringsAsFactors=FALSE)

require(foreach)
require(doMC)
require(iterators)

source("ebola-model.R")

readRDS("ebola-profiles.rds") %>%
  ddply(~country+type+model,subset,loglik==max(loglik)) %>%
  subset(type=="raw"&model=="stoch") -> mles

time1 <- c(Guinea="2014-01-05",Liberia="2014-06-01",
           SierraLeone="2014-06-08",WestAfrica="2014-01-05")

registerDoMC(4)

foreach (mle=iter(mles,by='row'),.combine=rbind) %dopar%
  {
    country=as.character(mle$country)
    type=as.character(mle$type)
    M <- ebolaModel(country=country,type=type,
                    na.rm=TRUE,nstage=3,timestep=0.01)
    p <- unlist(subset(mle,select=-c(country,type,model,profile,
                                     loglik,loglik.se,
                                     nfail.min,nfail.max,conv,etime)))
    coef(M,names(p)) <- unname(unlist(p))
    
    t0 <- as.Date(time1[country])
    
    simulate(M,nsim=10,as.data.frame=TRUE,obs=TRUE,include.data=TRUE,seed=2186L) %>%
      mutate(date=t0+time*7,country=country,type=type)
    } %>% saveRDS(file="ebola-diagnostics-sim.rds")

foreach (mle=iter(mles,by='row'),.combine=rbind) %dopar%
  {
    country=as.character(mle$country)
    type=as.character(mle$type)
    M <- ebolaModel(country=country,type=type,
                    na.rm=TRUE,nstage=3,timestep=0.01)
    p <- unlist(subset(mle,select=-c(country,type,model,profile,
                                     loglik,loglik.se,
                                     nfail.min,nfail.max,conv,etime)))
    coef(M,names(p)) <- unname(unlist(p))
    
    probe(M,probes=list(probe.acf(var="cases",lags=1,type="correlation")),
          nsim=500,seed=1878812716L) %>%
      as.data.frame() -> pb
    pb %>% mutate(sim=rownames(pb),
                  data=ifelse(sim=="data","data","simulation"),
                  type=type,
                  country=country)
    } %>% saveRDS(file="ebola-diagnostics-probes.rds")

## Additional diagnostics

## Run probes for each country
## Custom probe: exponential growth rate
probe.trend <- function (y) {
  cases <- y["cases",]
  df <- data.frame(week=seq_along(cases),cases=cases)
  fit <- lm(log1p(cases)~week,data=df)
  unname(coef(fit)[2])
  }

foreach (mle=iter(mles,by='row'),.combine=rbind) %dopar%
  {
    country=as.character(mle$country)
    type=as.character(mle$type)
    M <- ebolaModel(country=country,type=type,
                    na.rm=TRUE,nstage=3,timestep=0.01)
    p <- unlist(subset(mle,select=-c(country,type,model,profile,
                                     loglik,loglik.se,
                                     nfail.min,nfail.max,conv,etime)))
    coef(M,names(p)) <- unname(unlist(p))
    
    ## remove an exponential trend, give residuals on the log scale
    dm <- model.matrix(lm(log1p(cases)~time,data=as.data.frame(M)))
    rm <- diag(nrow(dm))-dm%*%solve(crossprod(dm))%*%t(dm)
    detrend <- function (x) log1p(x)%*%rm
    
    probe(M,probes=list(
      probe.acf(var="cases",lags=1,type="correlation"),
      sd=probe.sd(var="cases",transform=log1p),
      probe.quantile(var="cases",prob=c(0.9)),
      d=probe.acf(var="cases",lags=c(1,2,3),type="correlation",
                  transform=detrend),
      trend=probe.trend),
      nsim=2000,seed=2186L
      ) %>% as.data.frame() -> pb
    pb %>% mutate(sim=rownames(pb),
                  kind=ifelse(sim=="data","data","simulation"),
                  type=type,
                  country=country)
    } %>%
  melt(id=c("country","type","kind","sim"),variable.name="probe") %>%
  arrange(country,type,probe,kind,sim) %>%
  saveRDS(file="ebola-diagnostics-addl-probes.rds")

Forecasting

Below are the contents of the file forecasts.R, which performs all the forecasting computations. In a directory containing ebola-model.R, ebola-profiles.rds, and hosts, a command like

mpirun -hostfile hosts -np 101 Rscript --vanilla forecasts.R

will result in the execution of these computations.

Contents of the file forecasts.R:

require(pomp)
require(plyr)
require(reshape2)
require(magrittr)
options(stringsAsFactors=FALSE)

set.seed(988077383L)

require(foreach)
require(doMPI)
require(iterators)

source("ebola-model.R")

horizon <- 13

foreach (country=c("SierraLeone"),.inorder=TRUE,.combine=c) %:%
  foreach (type=c("raw","cum"),.inorder=TRUE,.combine=c) %do%
  {
    M1 <- ebolaModel(country=country,type=type,
                     timestep=0.01,nstageE=3,na.rm=TRUE)
    M2 <- ebolaModel(country=country,type="raw",
                     timestep=0.01,nstageE=3,na.rm=TRUE)
    time(M2) <- seq(from=1,to=max(time(M1))+horizon,by=1)
    M3 <- ebolaModel(country=country,type="raw",
                     timestep=0.01,nstageE=3,na.rm=TRUE)
    time(M3) <- seq(from=max(time(M1))+1,to=max(time(M1))+horizon,by=1)
    timezero(M3) <- max(time(M1))
    list(M1,M2,M3)
    } -> models
dim(models) <- c(3,2,1)
dimnames(models) <- list(c("fit","det.forecast","stoch.forecast"),
                         c("raw","cum"),c("SierraLeone"))

noexport <- c("models")

## Weighted quantile function
wquant <- function (x, weights, probs = c(0.025,0.5,0.975)) {
  idx <- order(x)
  x <- x[idx]
  weights <- weights[idx]
  w <- cumsum(weights)/sum(weights)
  rval <- approx(w,x,probs,rule=1)
  rval$y
  }

starts <- c(Guinea="2014-01-05",Liberia="2014-06-01",SierraLeone="2014-06-08")

cl <- startMPIcluster()
registerDoMPI(cl)

bake <- function (file, expr) {
  if (file.exists(file)) {
    readRDS(file)
    } else {
      val <- eval(expr)
      saveRDS(val,file=file)
      val
      }
  }

readRDS("profiles.rds") %>%
  ddply(~country+type+model,subset,loglik>max(loglik)-6,
        select=-c(conv,etime,loglik.se,nfail.min,nfail.max,profile)) -> mles

mles %>% melt(id=c("country","type","model"),variable.name='parameter') %>%
  ddply(~country+type+model+parameter,summarize,
        min=min(value),max=max(value)) %>%
  subset(parameter!="loglik") %>%
  melt(measure=c("min","max")) %>%
  acast(country~type~model~parameter~variable) -> ranges

mles %>% ddply(~country+type+model,subset,loglik==max(loglik),select=-loglik) %>%
  mutate(k=round(k,4),rho=round(rho,4),R0=round(R0,4),E_0=3*round(E_0/3)) %>%
  unique() %>%
  arrange(country,type,model) -> mles

### DETERMINISTIC MODELS

bake(file="ebola-forecasts_det.rds",{
  foreach (country=c("SierraLeone"),
           .inorder=TRUE,.combine=rbind) %:%
    foreach (type=c("raw","cum"),nsamp=c(1000,3000),
             .inorder=TRUE,.combine=rbind) %do%
    {
      
      params <- sobolDesign(lower=ranges[country,type,'det',,'min'],
                            upper=ranges[country,type,'det',,'max'],
                            nseq=nsamp)
      
      foreach(p=iter(params,by='row'),
              .inorder=FALSE,
              .combine=rbind,
              .noexport=noexport,
              .options.multicore=list(set.seed=TRUE),
              .options.mpi=list(chunkSize=10,seed=1568335316L,info=TRUE)
              ) %dopar%
        {
          M1 <- models["fit",type,country][[1]]
          M2 <- models["det.forecast",type,country][[1]]
          ll <- logLik(traj.match(M1,start=unlist(p)))
          x <- trajectory(M2,params=unlist(p))
          p <- parmat(unlist(p),20)
          rmeasure(M2,x=x,times=time(M2),params=p) %>%
            melt() %>%
            mutate(time=time(M2)[time],
                   period=ifelse(time<=max(time(M1)),"calibration","projection"),
                   loglik=ll)
        } %>% 
        subset(variable=="cases",select=-variable) %>%
        mutate(weight=exp(loglik-mean(loglik))) %>%
        arrange(time,rep) -> sims
      
      ess <- with(subset(sims,time==max(time)),weight/sum(weight))
      ess <- 1/sum(ess^2)
      cat("ESS det",country,type,"=",ess,"\n")
      
      sims %>%
        ddply(~time+period,summarize,prob=c(0.025,0.5,0.975),
              quantile=wquant(value,weights=weight,probs=prob)) %>%
        mutate(prob=mapvalues(prob,from=c(0.025,0.5,0.975),
                              to=c("lower","median","upper"))) %>%
        dcast(period+time~prob,value.var='quantile') %>%
        mutate(country=country,type=type)
      }
  }) -> fc_tm

### STOCHASTIC MODEL

bake(file="ebola-forecasts_stoch.rds",{
  foreach (country=c("SierraLeone"),
           .inorder=TRUE,.combine=rbind) %:%
    foreach (type=c("raw","cum"),nsamp=c(200,200),
             .inorder=TRUE,.combine=rbind) %do%
    {
      
      params <- sobolDesign(lower=ranges[country,type,'stoch',,'min'],
                            upper=ranges[country,type,'stoch',,'max'],
                            nseq=nsamp)
      
      foreach(p=iter(params,by='row'),
              .inorder=FALSE,
              .combine=rbind,
              .noexport=noexport,
              .options.multicore=list(set.seed=TRUE),
              .options.mpi=list(chunkSize=1,seed=1568335316L,info=TRUE)
              ) %dopar%
        {
          M1 <- models["fit",type,country][[1]]
          M2 <- models["stoch.forecast",type,country][[1]]
          pf <- pfilter(M1,params=unlist(p),Np=2000,save.states=TRUE)
          pf$saved.states %>% tail(1) %>% melt() %>%
            acast(variable~rep,value.var='value') %>%
            apply(2,function (x) {
              setNames(c(x["S"],sum(x[c("E1","E2","E3")]),x["I"],x["R"]),
                       c("S_0","E_0","I_0","R_0"))}) -> x
          pp <- parmat(unlist(p),ncol(x))
          pp[rownames(x),] <- x
          simulate(M2,params=pp,obs=TRUE) %>%
            melt() %>%
            mutate(time=time(M2)[time],
                   period=ifelse(time<=max(time(M1)),"calibration","projection"),
                   loglik=logLik(pf))
        } %>% subset(variable=="cases",select=-variable) %>%
        mutate(weight=exp(loglik-mean(loglik))) %>%
        arrange(time,rep) -> sims
      
      ess <- with(subset(sims,time==max(time)),weight/sum(weight))
      ess <- 1/sum(ess^2)
      cat("ESS stoch",country,type,"=",ess,"\n")
      
      sims %>% ddply(~time+period,summarize,prob=c(0.025,0.5,0.975),
                     quantile=wquant(value,weights=weight,probs=prob)) %>%
        mutate(prob=mapvalues(prob,from=c(0.025,0.5,0.975),
                              to=c("lower","median","upper"))) %>%
        dcast(period+time~prob,value.var='quantile') %>%
        mutate(country=country,type=type)
      }
  }) -> fc_if

ldply(list(stoch=fc_if,det=fc_tm),.id='model') %>%
  ddply(~country,mutate,
        model=factor(model,levels=c("stoch","det")),
        date=as.Date(starts[unique(as.character(country))])+7*(time-1)) %>%
  saveRDS(file='forecasts.rds')

closeCluster(cl)
mpi.quit()

References